package net.lzzy.twocarsort;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.RelationalGroupedDataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

public class GroupBy {
    public static void main(String[] args) {
        SparkSession ss = SparkSession.builder()
                .appName(CarSortSparkSession.class.getName())
                .master("local")
                .getOrCreate();
        Dataset<Row> json = ss.read().json("src/student.json");
        RelationalGroupedDataset institute = json.groupBy("institute");
        Dataset<Row> max = institute.max("age");
        Dataset<Row> min = institute.min("age");
        Dataset<Row> avg = institute.avg("age");
        Dataset<Row> count = institute.count();
        max.join(min,"institute")
                .join(avg, "institute")
                .join(count, "institute")
                .selectExpr("`institute`","`max(age)`","`min(age)`","round(`avg(age)`,2) as `avg(age)`","`count`")
                .show();
    }
}
